/*===================================================================
enrollments.do
	*Updated:	2022-11-15
	*Author(s):	David Phillips, Sean McConville, Grace Ortuzar, Charlie Law
	*Purpose:	Creates person-level outcomes for enrollment in HMIS
				at different time horizons from original assessment.
				For the RCT, the focal date is the RCT enrollment.
				For others, it is the assessment date.
===================================================================*/

	

*Define end of outcome data
	global enddate 5/3/2021
	scalar enddate_num = date("$enddate","MDY")

	
*Import raw program enrollment data; merge is assessments	
	clear 
	import excel "$datadir\LEO PR-VISPDAT 5.3.2021.xlsx", sheet("Programs") firstrow clear
	rename ClientsUniqueIdentifier HMISID 
	duplicates report HMISID EnrollmentsProjectStartDate ProgramsProjectTypeCode ProgramsFullName
	duplicates drop HMISID EnrollmentsProjectStartDate ProgramsProjectTypeCode ProgramsFullName, force 
	count 
	merge m:1 HMISID using "$datadir/assmt.dta", keepusing(startdate first_assessment_dt) /*everyone enrolled in a program can be matched to an assessment, using file is the full assessment file*/ 
	drop if _merge == 1
	drop _merge 

*Process enrollment file and define enrollment outcomes
	codebook EnrollmentsProjectStartDate
	gen mm_e=month(EnrollmentsProjectStartDate)
	gen q_e=quarter(EnrollmentsProjectStartDate)
	gen year_e=year(EnrollmentsProjectStartDate)
	order mm_e year_e, after (EnrollmentsProjectStartDate)
	gen month_e=12*(year_e-2018)+mm_e -6 /*month 1 is july 2018, month 25 is july 2020*/ 
	label var month_e "Month of Enrollment" 
	label values month_e months 
	
	*keep only enrollments after the focal date; don't want lagged outcomes here
	drop if EnrollmentsProjectStartDate < startdate
	
	*ignoring "other" and "services only"
	drop if ProgramsProjectTypeCode == "Other" | ProgramsProjectTypeCode  == "Services Only"
	
	
	
	*different program types
	gen hp = ProgramsProjectTypeCode == "Homeless Prevention"
	gen coord = ProgramsProjectTypeCode == "Coordinated Entry"
	gen shelter = ProgramsProjectTypeCode == "Emergency Shelter"
	gen perm = regexm(ProgramsProjectTypeCode,"disab")
	gen rrh = ProgramsProjectTypeCode == "PH - Rapid Re-Housing" 
	gen tran = ProgramsProjectTypeCode == "Transitional Housing"
	gen out = ProgramsProjectTypeCode == "Street Outreach"
	gen dh = (ProgramsProjectTypeCode == "Homeless Prevention"  & regexm(ProgramsFullName,"HPS - DH"))
	gen big = (rrh | perm | tran)
	gen non_hp = (hp == 0 & ProgramsProjectTypeCode ~= "")
	
	*imputing missing exit dates to 95 percentile of duration for program type
	gen days = EnrollmentsProjectExitDate - EnrollmentsProjectStartDate
	su days, det
	egen days_prog = mean(days), by(ProgramsProjectTypeCode)
	replace days = days_prog if days == .
	replace EnrollmentsProjectExitDate = EnrollmentsProjectStartDate + days if EnrollmentsProjectExitDate == .
	su days, det
	drop days days_prog
	
	*calculate number of days by program type and time horizon
	foreach var in hp coord shelter perm rrh tran out dh non_hp big {
	foreach y in 1 2 3 4 5 6 7 8 9 10 11 12 {	
			gen `var'_`y'mo_days = max(0,min(EnrollmentsProjectExitDate+1,startdate+30*`y') - max(EnrollmentsProjectStartDate,startdate)) if `var' == 1
	}
	}
	
	foreach var in hp coord shelter perm rrh tran out dh non_hp big {
	foreach y in 1 2 3 4 5 6 7 8 9 10 11 12 {	
			gen `var'_`y'mo = (`var'_`y'mo_days > 0 & `var'_`y'mo_days  ~= .)
	}
	}
	
	foreach var in non_hp  {
	foreach y in 1 2 3 4 5 6 7 8 9 10 11 12 {	
			gen `var'3_`y'mo_days = max(0,min(EnrollmentsProjectExitDate+1,startdate+30*`y') - max(EnrollmentsProjectStartDate,startdate+90)) if `var' == 1
			gen `var'3_`y'mo = (`var'3_`y'mo_days > 0 & `var'3_`y'mo_days  ~= .)

			gen `var'fl_`y'mo_days = max(0,min(EnrollmentsProjectExitDate+1,startdate+30*`y') - max(EnrollmentsProjectStartDate,startdate+30*(`y'-1))) if `var' == 1
			gen `var'fl_`y'mo = (`var'fl_`y'mo_days > 0 & `var'fl_`y'mo_days  ~= .)
	}
	}
	
	
	*collapse to individual level, over multiple records
	collapse (max) *_*mo (sum) *_*mo_days, by(HMISID) 
	
	
	*zeros for those with no program records; missing values for those going beyond timeframe of the data
	merge m:1 HMISID using "$datadir/assmt.dta", keepusing(startdate ) 
	
	foreach var in hp coord shelter perm rrh tran out dh non_hp big non_hp3 non_hpfl {
	foreach y in 1 2 3 4 5 6 7 8 9 10 11 12 {	
		replace `var'_`y'mo_days = 0 if _merge ==2
		replace `var'_`y'mo = 0 if _merge == 2
	}
	}
	
	foreach var in hp coord shelter perm rrh tran out dh non_hp big non_hp3 non_hpfl {
	foreach y in 1 2 3 4 5 6 7 8 9 10 11 12 {	
		replace `var'_`y'mo_days = . if startdate + 30*`y' > enddate_num 
		replace `var'_`y'mo = . if startdate + 30*`y' > enddate_num 
	}
	}
	drop _merge startdate
	
	foreach var in coord shelter perm rrh tran out dh non_hp big non_hp3 non_hpfl {
	foreach y in 1 2 3 4 5 6 7 8 9 10 11 12 {	
		rename `var'_`y'mo `var'_`y'm
		rename `var'_`y'mo_days `var'_`y'm_days
	}
	}
		
	*saving
	save "$datadir/enrollments.dta", replace
	
	
	
	
	